from parse_config import Config
from CreateCluster import CreateCluster
from utils import create_data_array
import time
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import multiprocessing as mp
from joblib import Parallel, delayed
from functools import partial
from copy import deepcopy

def call_clustering(data, params):
    print (f"Processing params --  K: {params.n_clusters}, seed: {params.random_state}")
    cluster = CreateCluster(params)
    labels, cluster_res = cluster.cluster_data(data)
    ret = (cluster_res["Silhouette score"], cluster_res["Sum of distances:"],cluster_res["Davies-Bouldin index:"], params.n_clusters, params.random_state)
    return ret

if __name__ == "__main__":
    start = time.time()
    file_path = "config.json"
    params = Config.from_file(file_path)
    
    # random.seed(params.random_state)
    raw_data_filename = os.path.join(params.features_dir, "train_raw_ns.npy")
    if os.path.exists(raw_data_filename):
        print ("Found data array file, loading...")
        train_raw_ns = np.load(raw_data_filename)

    else:
        print (f"Creating data array..")
        train_raw_ns, _, _, _, _, _ = create_data_array(params.features_dir)
        np.save(raw_data_filename, train_raw_ns)

    print (f"Data raw shape: {train_raw_ns.shape}")
    # seed for shuffling datasets
    # train_raw = train_raw_ns.reshape(train_raw_ns.shape[0], -1) # flat dataset

    k_clusters = [5,6,7,9] # select range of number of clusters
    random_seed_vals = [0,1,2,3,4,5]
    list_of_params = []
    for _seed in random_seed_vals:
        for k in k_clusters:
            p = deepcopy(params)
            p.n_clusters = k
            p.random_state = _seed
            list_of_params.append(p)


    print (f"Lenght of List of params before parallel: {len(list_of_params)}")
    retLst = Parallel(n_jobs=4)(
                delayed( partial(call_clustering, train_raw_ns) ) 
                (x) for x in list_of_params 
          )
    print (len(retLst))
    silhouette = []
    inertia = []
    dbi = []
    k_vals = []
    random_states = []
    for ret_clusters in retLst:
        silhouette.append(ret_clusters[0])
        inertia.append(ret_clusters[1])
        dbi.append(ret_clusters[2])
        k_vals.append(ret_clusters[3])
        random_states.append(ret_clusters[4])
    
    df = pd.DataFrame({'K': k_vals,
        'seed': random_states,
        'silhouette': silhouette,
        'dbi': dbi,
        'inertia': inertia
        })
    
    csv_name = f"error_data_{params.cluster_algo}_{params.distance_metric}_{str(k_clusters)}_{str(random_seed_vals)}.csv"
    df.to_csv(csv_name)
        
    # # Uncomment lines 81 onwards to activate "Plotting"
    # fig, ax = plt.subplots()
    # data_to_plot = [silhouette[k] for k in len(silhouette)]
    # ax.boxplot(data_to_plot, labels=k_values, showmeans=True)

    # # Customize the plot
    # ax.set_xlabel('K Values')
    # ax.set_ylabel('Metric Score')
    # ax.set_title('K-means Clustering Performance with Different Random Seeds')

    # # Display the plot
    # plt.show()

    # for k, k_results in results.items():
    # print(f"Results for K={k}:")
    # for seed, score in zip(random_seeds, k_results):
    #     print(f" - Random Seed {seed}: {score:.2f}")
    # print(f"Mean: {np.mean(k_results):.2f}")
    # print(f"Standard Deviation: {np.std(k_results):.2f}")
    # print()